/**
 * Static Utilities for writing files in ARFF format - the format used by WEKA.
 *
 * @note for READING files in ARFF format, see https://github.com/chesles/node-arff
 * 
 * @author Erel Segal-Halevi
 * @since 2013-08
 */


var _ = require('underscore')._;
var FeaturesUnit = require('../features');

/**
 * convert a single dataset to Weka ARFF string.
 * @param dataset an array of samples in the format {input: {feature1: xxx, feature2: yyy, ...}, output: [1,2,3]}
 * @param relationName string for the @relation on the top of the file.
 * @param featureExtractor [optional]
 * @return an ARFF string. 
 */
exports.toARFF = function(dataset, relationName, featureExtractor) {
	if (!featureExtractor) featureExtractor=_.identity;
	
	var featureLookupTable = new FeaturesUnit.FeatureLookupTable();
	
	// Extract the input attributes (- features):
	dataset.forEach(function(datum) {
		datum.input = featureExtractor(datum.input, {});
		if (!_.isObject(datum.input))
			throw new Error("Expected feature vector to be a hash, but found "+JSON.stringify(datum.input));
		featureLookupTable.addFeatures(datum.input);
	});
	
	// Extract the target attributes (- classes):
	dataset.forEach(function(datum) {
		if (!_.isArray(datum.output))
			datum.output = [datum.output];
		datum.output = datum.output.map(function(anOutput) {
			return _.isString(anOutput)? anOutput: JSON.stringify(anOutput);
		});
		featureLookupTable.addFeatures(datum.output);
	});

	//console.dir(featureLookupTable);
	return toARFFLocal(dataset, relationName, featureLookupTable);
}

/**
 * convert many dataset to Weka ARFF files.
 * @param mapFileNameToDataset an array of samples in the format {input: {feature1: xxx, feature2: yyy, ...}, output: [1,2,3]}
 * @return an ARFF file. 
 */
exports.toARFFs = function(outputFolder, mapFileNameToDataset, featureExtractor) {
	if (!featureExtractor) featureExtractor=_.identity;
	var featureLookupTable = new FeaturesUnit.FeatureLookupTable();
	
	// Extract the input attributes (- features):
	for (var relationName in mapFileNameToDataset) {
		mapFileNameToDataset[relationName].forEach(function(datum) {
			datum.input = featureExtractor(datum.input, {});
			if (!_.isObject(datum.input))
				throw new Error("Expected feature vector to be a hash, but found "+JSON.stringify(datum.input));
			featureLookupTable.addFeatures(datum.input);
		});
	}
	
	
	// Extract the target attributes (- classes):
	for (var relationName in mapFileNameToDataset) {
		mapFileNameToDataset[relationName].forEach(function(datum) {
			if (!_.isArray(datum.output))
				datum.output = [datum.output];
			datum.output = datum.output.map(function(anOutput) {
				return _.isString(anOutput)? anOutput: JSON.stringify(anOutput);
			});
			featureLookupTable.addFeatures(datum.output);
		});
	}
	

	//console.dir(featureLookupTable);

	var fs = require('fs');
	for (var relationName in mapFileNameToDataset) {
		fs.writeFileSync(outputFolder+"/"+relationName+".arff", 
			toARFFLocal(mapFileNameToDataset[relationName], relationName, featureLookupTable));
	}
}


/**
 * convert a single dataset to Weka ARFF string.
 * @param dataset an array of samples in the format {input: {feature1: xxx, feature2: yyy, ...}, output: [1,2,3]}
 * @param relationName string for the @relation on the top of the file.
 * @param featureLookupTable maps features to indices
 * @return an ARFF string. 
 */
var toARFFLocal = function(dataset, relationName, featureLookupTable) {
	var arff = "% Automatically generated by Node.js\n";
	arff += "@relation "+relationName+"\n";

	featureLookupTable.featureIndexToFeatureName.forEach(function(featureName) {
		if (_.isUndefined(featureName)) 
			arff += "@attribute undefined {0,1}"+"\n";
		else if (!_.isString(featureName))
			throw new Error("Expected featureName to be a string, but found "+JSON.stringify(featureName));
		else arff += "@attribute "+featureName.replace(/[^a-zA-Z0-9]/g, "_")+" "+"{0,1}"+"\n";
	});

	arff += "\n@data\n";
	
	dataset.forEach(function(datum) {
		var datumArff = _.clone(datum.input, {});
		for (var i=0; i<datum.output.length; ++i)
			datumArff[datum.output[i]]=1;
		//console.dir(datumArff);
		var array = featureLookupTable.hashToArray(datumArff);
		arff += array + "\n";
	});

	return arff;
};

